#!/bin/bash
#NB "strict" mode is used if run as a script
#
# Copyright (c) 2020-2022 NVI, Inc.
#
# This file is part of VLBI Field System
# (see http://github.com/nvi-inc/fs).
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#

VERSION=2022-10-05

version(){
    echo "[plog $VERSION]"
}

FS_BASE="${FS_BASE:-/usr2}"
FS_PATH="${FS_PATH:-${FS_BASE}/fs}"
FS_LOG_PATH="${FS_LOG_PATH:-${FS_BASE}/log}"
FS_PROC_PATH="${FS_PROC_PATH:-${FS_BASE}/proc}"
FS_SCHED_PATH="${FS_SCHED_PATH:-${FS_BASE}/sched}"

if [[ -z "${NETRC_DIR:-}" ]]; then
    NETRC_DIR=$HOME
    CURL_NETRC_FILE_OPTION=
else
    CURL_NETRC_FILE_OPTION="--netrc-file $NETRC_DIR/.netrc"
fi

# Lines to remove from log file to make "reduced" logs
# Use egrep syntax
PLOG_REDUCE_PATTERN="${PLOG_REDUCE_PATTERN:-^[:.0-9]*#(rdtc|dbtcn)}"

# Added to filename before extension of log files with all data
PLOG_FULL_SUFFIX="${PLOG_FULL_SUFFIX:-_full}"

usage_long() {
    cat <<EOF
Push log files to the data centers.

USAGE:
    ${black}$0 [options] ARG [ARG...]${normal}

ARG can be an experiment ID or the path of a file.

${underline}Options:${normal}
    ${black}-l${normal}            Use latest log in $FS_LOG_PATH (other than 'station.log'
                  or 'point.log')
    ${black}-t${normal}            Test run: print commands, do not actually push files to server
    ${black}-h${normal}            Print this message
    ${black}-c CENTER${normal}     Push files to data CENTER. Overrides the DATA_CENTERS
                  environment variable. Flag can be given multiple times.
                  Upper and lower case values are accepted.
    ${black}-p${normal}            Also push proc file
    ${black}-q${normal}            Quiet mode
    ${black}-z${normal}            Push a compressed log (only full if log contains multicast
                  data). Can't be used with '-r'.
    ${black}-r${normal}            Push only a reduced log if log contains multicast log data
                  Ignored for a non-multicast log. Can't be used with '-z' in
                  either case.
    ${black}-v${normal}            Print plog version and exit
    ${black}-g${normal}            Force gzip compression/uncompression

If the log contains RDBE or DBBC3 multicast data:

1.  If not already done, the log file will be renamed to
    '$FS_LOG_PATH/<log>_full.log'.

2.  If neither '-r' or '-z' is supplied both a reduced log and a
    compressed full log will be created on /tmp and uploaded.

3.  If '-r' is supplied, only a reduced log file, without multicast
    data, created in /tmp and is transferred to the data center.

4.  If '-z' is supplied, only a compressed full log file is created in
    /tmp and transferred to the data center.

To see progress when compressing and reducing, install the 'pv' package.

plog requires the STATION environment variable be set to the
two-letter station code. Upper and lower case values are accepted, but
they are mapped to lower case. E.g. add the following ~/.login (tcsh)
or ~/.profile (bash):

    setenv STATION gs ${blue}#tcsh${normal}
    export STATION=gs ${blue}#bash${normal}

Data centers are specified in the DATA_CENTERS environment variable
which can contain CDDIS, OPAR, BKG, and HAYSTACK (VGOS only). Multiple
centers are separated with a space. Upper and lower case values are
accepted. E.g. add the following to your login script

    setenv DATA_CENTERS "CDDIS HAYSTACK" ${blue}#tcsh${normal}
    export DATA_CENTERS="CDDIS HAYSTACK" ${blue}#bash${normal}

If DATA_CENTERS is empty, plog defaults to CDDIS.

Data center login must be configured in ".netrc" which by default is in "~".
For example, for CDDIS:

    machine urs.earthdata.nasa.gov
        login mycddisuser
        password secret

See "man 5 netrc" for more details on ".netrc".

If your curl supports the "--netrc-file" option (see "man curl") , you can
change the directory for ".netrc" by setting the NETRC_DIR environment
variable.  E.g. add the following to your login script

    setenv NETRC_DIR "/usr2/control" ${blue}#tcsh${normal}
    export NETRC_DIR="/usr2/control" ${blue}#bash${normal}

The default transfer method from BKG is ftp-ssl.  If you have FSL8 or
another old distribution that does not support curl ftp-ssl with BKG,
but worked with the previous versions of plog that used non-ssl ftp
for BKG, you can restore the previous behaviour with:

    setenv PLOG_BKG_METHOD ftp ${blue}#tcsh${normal}
    export PLOG_BKG_METHOD=ftp ${blue}#bash${normal}

However, this method will no longer work when BKG stops support
of non-ssl ftp, which is expected at the end of May 2022.

plog defaults to using bzip2 compression, if it is available, for full
log files. Otherwise gzip is used. The use of gzip can be forced with
the command line option -g or setting the PLOG_COMPRESSED_EXT
environment variable to gz:

    setenv PLOG_COMPRESSED_EXT gz  ${blue}#tcsh${normal}
    export PLOG_COMPRESSED_EXT=gz  ${blue}#bash${normal}

Examples of usage:
    ${blue}# push log for latest session, sending reduced if needed${normal}
    plog -l
    ${blue}# push log and proc files for session vgp007${normal}
    plog -p vgp007
    ${blue}# push (full) compressed log for vgp007${normal}
    plog -z vgp007
    ${blue}# push both reduced and compressed multicast log for vgp007${normal}
    plog vgp007
    ${blue}# push everything in /usr2/log to Haystack${normal}
    plog -c HAYSTACK /usr2/log/*.log

EOF
}

bold=
underline=
standout=
normal=
black=
red=
green=
yellow=
blue=
magenta=
cyan=
white=
# Check if stdout is a terminal
if test -t 1; then
    # see if it supports colors...
    ncolors=$(tput colors)
    if [[ -n "$ncolors" ]] && [[ $ncolors -ge 8 ]]; then
        bold="$(tput bold)"
        underline="$(tput smul)"
        standout="$(tput smso)"
        normal="$(tput sgr0)"
        black="$(tput setaf 0)"
        red="$(tput setaf 1)"
        green="$(tput setaf 2)"
        yellow="$(tput setaf 3)"
        blue="$(tput setaf 4)"
        magenta="$(tput setaf 5)"
        cyan="$(tput setaf 6)"
        white="$(tput setaf 7)"
    fi
fi


usage(){
    cat <<EOF
Usage: $0 [-l] [-t] [-h] [-c CENTER] ARG ...
Push log file(s) to the data centers.
EOF
}

fatal(){
    echo -e "${red}ERROR:${normal} plog:" "$*" >&2
    exit 1
}

warn(){
    if [[ -z "$QUIET" ]]; then
        echo -e "${yellow}WARN:${normal} plog:" "$@" >&2
    fi
}
info(){
    if [[ -z "$QUIET" ]]; then
        echo -e "${green}INFO:${normal} plog:" "$@" >&2
    fi
}

# Check if pv (Pipe Viewer) is installed.
# Used for progress meters

safe_pv() {
    PV=$(which pv)
    if [[ -n "$QUIET" || $? -ne 0 ]]; then
        cat $@
        return
    fi
    $PV $@
}

## Put Commands
# Helper functions for data center commands
# follow signatures
# f URL file [file...]
pftp() {
    if ! which curl > /dev/null; then
        fatal "'curl' not found"
    fi

    if [[ ! -e "$NETRC_DIR/.netrc" ]]; then
        fatal "$NETRC_DIR/.netrc not found, see usage"
    fi

    URL="$1"; shift
    local F=($@) #Turn into an array so IFS is used in expansion
    local IFS=","
    local G="{${F[*]}}"
    local IFS=" " #Then don't take commas out when expanding
    eval $DRY curl $SSL_REQUIRED -n $CURL_NETRC_FILE_OPTION -T '$G' "ftp://$URL/"
}

pscp() {
    URL="$1"; shift
    $DRY scp $@ "$URL"
}
dummy() {
    echo $@
}

## Utility functions
# Format an array with first arg as template. Used for curl.
fmtarray() {
    local FMT="$1"; shift
    for f in $@; do
        printf "$FMT" "$f"
    done
}
# Add file to upload queue
add() {
    FILES+=($@)
    info queued $@
}

CDDIS_URL="https://depot.cddis.eosdis.nasa.gov/CDDIS_FileUpload"
cddis() {
    # Check if curl is installed.
    if ! which curl > /dev/null; then
        fatal "'curl' not found"
    fi

    if [[ ! -e "$NETRC_DIR/.netrc" ]]; then
        fatal "$NETRC_DIR/.netrc not found, see usage"
    fi

    if ! grep -q urs.earthdata.nasa.gov "$NETRC_DIR/.netrc"; then
        fatal "$NETRC_DIR/.netrc does not contain CDDIS login information, see usage"
    fi

    local DRY=${DRY:-" >/dev/null"}

    info "Logging into CDDIS..."
    # Curl flags used:
    # -c     -- cookie jar (write)
    # -b     -- cookie jar (read)
    # -n     -- use netrc file for logins
    # -f     -- set return flag to 22 on HTTP error
    # -F     -- form data
    # -k     -- insecure mode (required on older versions of Debian), should 
    #           be removed eventually
    # -s -S  -- silent except on errors
    
    # Disallow reading cookie file for other users
    local mask=$(umask)
    umask 0077

    eval $DRY curl\
        -c $HOME/.urs_cookies \
        -n \
        -k \
        -f -s -S \
        -L \
        $CURL_NETRC_FILE_OPTION \
        $CDDIS_URL/login 

    info "Done"

    info "Copying file to CDDIS..."

    local quiet=""
    if [[ -n "$QUIET" ]]; then 
        local quiet="-s -S"
    fi


    eval $DRY curl -X POST \
        -b $HOME/.urs_cookies \
        -k \
        -f \
        $quiet \
        -F "fileType=VLBI" \
        $(fmtarray ' -F "file[]=@%s"' $@) \
        $CDDIS_URL/upload/
    info "Done"

    # Restore previous umask
    umask $mask
}

OPAR_URL="https://ivsopar.obspm.fr/upload/"
opar() {
    # Check if curl is installed.
    if ! which curl > /dev/null; then
        fatal "'curl' not found"
    fi

    if [[ ! -e "$NETRC_DIR/.netrc" ]]; then
        fatal "$NETRC_DIR/.netrc not found, see usage"
    fi

    if ! grep -q ivsopar.obspm.fr "$NETRC_DIR/.netrc"; then
        fatal "$NETRC_DIR/.netrc does not contain OPAR login information, see usage"
    fi

    local DRY=${DRY:-" >/dev/null"}

    info "Copying files to OPAR..."
    while (( "$#" )); do
        eval $DRY curl \
            -n \
            -k \
            $CURL_NETRC_FILE_OPTION \
            -F "fichier=@$1" -F 'mode=upload' $OPAR_URL
        shift
    done
    info "Done"
}

## Commands for Data Centers
PLOG_CTR_CDDIS="cddis"
PLOG_CTR_OPAR="opar"
PLOG_CTR_BKG="pftp ivs.bkg.bund.de"
PLOG_CTR_HAYSTACK="pscp evlbi1.haystack.mit.edu:/data-st12/vgos/logs"
PLOG_CTR_DUMMY="dummy"

# Hack to work around older versions of bash not having asoc arrays
# gets all env variables that start with "PLOG_CTR_" and assume the rest of
# the name is the key.
CTRS=$(compgen -A variable | grep PLOG_CTR_ | cut -c $(echo "PLOG_CTR_" | wc -c)- | paste -s -d" ")


# Default Data center
DATA_CENTERS=${DATA_CENTERS:-"CDDIS"}
DATA_CENTERS=$(echo "$DATA_CENTERS" | tr '[:lower:]' '[:upper:]')

PLOG_BKG_METHOD=${PLOG_BKG_METHOD:-'ftp-ssl'}

if [[ ! "$PLOG_BKG_METHOD" =~ ^(ftp|ftp-ssl)$ ]]; then
   echo "Unknown bkg method: $PLOG_BKG_METHOD."
   exit 1
fi

if [[ "$PLOG_BKG_METHOD" =~ ^ftp-ssl$ ]]; then
    SSL_REQUIRED=--ssl-reqd
else
    SSL_REQUIRED=
fi

joinlst () {
    local IFS=" "
    echo "$*" | sed 's/ /, /g' | sed 's/, \(\S*\)$/ or \1/g'
}

# Compress the log to a temp directory
# Do not compress already compressed files
compress() {
    if [[ ${1##*.} == $PLOG_COMPRESSED_EXT ]]; then
        echo "$1"
        return
    elif [[ "$PLOG_COMPRESSED_EXT" =~ bz2 ]]; then
        local compress_program=bzip2
    else
        local compress_program=gzip
    fi

    local ext=${1#*.}
    local name=$(basename "$1")
    local out="/tmp/$name.$PLOG_COMPRESSED_EXT"

    info Compressing log file...
    if [[ -n "$DRY" ]]; then
        >&2 echo "$compress_program --best -c $1 >  $out"
    else
        safe_pv "$1" | "$compress_program" --best > "$out"
    fi
    echo "$out"
}
# Uncompress the log to a temp directory
uncompress() {
    local name=$(basename $1 .$PLOG_COMPRESSED_EXT)
    local out="/tmp/$name"

    if [[ "$PLOG_COMPRESSED_EXT" =~ bz2 ]]; then
        local uncompress_program=bunzip2
    else
        local uncompress_program=gunzip
    fi

    info Uncompressing log file...
    if [[ -n "$DRY" ]]; then
        >&2 echo "$uncompress_program -c $1 >  $out"
    else
        safe_pv "$1" | "$uncompress_program" > "$out"
    fi
    echo "$out"
}
# Return a reduced log in /tmp can either be raw or compressed log
reduce() {
    local ext=${1#*.}
    local name=$(basename $(basename "$1" .$ext) $PLOG_FULL_SUFFIX)
    local out="/tmp/$name.log"
    info Creating reduced log file...
    if [[ -n "$DRY" ]]; then
        >&2 echo "$egrep_program -v "$PLOG_REDUCE_PATTERN" $1 > $out"
    elif [[ "$PLOG_COMPRESSED_EXT" =~ bz2 && ! "$1" =~ \.bz2 ]]; then
# bunzip2 (via bzegrep) doesn't pass the first 5000 bytes of an uncompressed
# file from stdin (seems like a bug), so use egrep directly as a work around
        safe_pv "$1" | egrep -v "$PLOG_REDUCE_PATTERN" > "$out"
    else
        safe_pv "$1" | "$egrep_program" -v "$PLOG_REDUCE_PATTERN" > "$out"
    fi
    echo "$out"
}

# Find latest modified log in $FS_LOG_PATH which isn't station or point
get_latest_exp() {
    local log=$(ls -t $FS_LOG_PATH\
        | grep "$STATION\($PLOG_FULL_SUFFIX\)\?\\.log\(.$PLOG_COMPRESSED_EXT\)\?\$"\
        | egrep -v '(station|point)'\
        | head -1)

    local ext=${log#*.}
    local expname
    expname=$(basename "$log" .$ext)
    expname=$(basename $expname $PLOG_FULL_SUFFIX)
    expname=$(basename $expname $STATION)
    echo $expname
}

main () {
    #STATION = Two letter station code, eg "gs"
    CENTERS_OVER=
    LATEST=
    DRY=
    PUSH_PROC=
    COMPRESS=
    QUIET=
    BOTH=1

    while getopts hltqpzrvc:g opt; do
        case $opt in
            l)
                LATEST=1
                ;;
            t)
                DRY=echo
                ;;
            c)
                NEW_CENTER=$(echo "$OPTARG" | tr '[:lower:]' '[:upper:]')
                if [[ ! $CTRS =~ $NEW_CENTER ]]; then
                    fatal "Unknown data center '$NEW_CENTER'"
                fi
                CENTERS_OVER="$CENTERS_OVER $NEW_CENTER"
                ;;
            p)
                PUSH_PROC=1
                ;;
            q)
                QUIET=1
                ;;
            z)
                COMPRESS=1
                ;;
            r)
                BOTH=
                ;;
            h)
                usage_long
                exit
                ;;
            v) 
                version $0
                exit
                ;;
            g)
                PLOG_COMPRESSED_EXT=gz
                ;;
            *)
                usage >&2
                exit 1
                ;;
            :)
              fatal "Option -$OPTARG requires an argument."
              ;;
        esac
    done
    shift $(($OPTIND - 1))

# Conflict check
    if [[ -n "$COMPRESS" && -z "$BOTH" ]]; then
        fatal "Can't use '-r' and '-z' together"
    fi

# Check compression type
    if $(command -v  bzip2 >/dev/null) ; then
        PLOG_COMPRESSED_EXT="${PLOG_COMPRESSED_EXT:-bz2}"
    else
        PLOG_COMPRESSED_EXT="${PLOG_COMPRESSED_EXT:-gz}"
    fi

    if [[ ! "$PLOG_COMPRESSED_EXT" =~ ^(bz2|gz)$ ]]; then
        echo "PLOG_COMPRESSED_EXT must be bz2 or gz, was $PLOG_COMPRESSED_EXT"
        exit 1
     fi

     if [[ "$PLOG_COMPRESSED_EXT" =~ bz2 ]]; then
         egrep_program=bzegrep
     else
         egrep_program=zegrep
     fi


    # Default DATA_CENTERS overridden by flag
    if [[ -n "$CENTERS_OVER" ]]; then
        DATA_CENTERS=$CENTERS_OVER
    fi

    for center in $DATA_CENTERS; do
        set +u
        if [[ -z  "$(eval echo \$PLOG_CTR_$center)" ]]; then
            fatal "unknown data center '$center'"
        fi
        set -u
    done

    # If LATEST not set and no arguments, exit
    if [[ -z "$LATEST" && "$#" -eq 0 ]]; then
        usage >&2
        exit 1
    fi

    # List of files or experiment names
    REFS=($@)
    # List of exisiting files that will be pushed
    FILES=()

    if [[ -n "$LATEST" ]]; then
        llog=$(get_latest_exp)
        REFS+=( $llog )
    fi

    for ref in ${REFS[@]}; do
        # First check if the argument looks like a path.
        # If does, check if it is a file.
        # If it's a file, do not process it, just add it to
        # the list; if it isn't, complain and exit.
        if [[ "$ref" =~ /  ]]; then
            if [[ -f "$ref" ]]; then
                add $ref
                continue
            else
                fatal "file $ref not found"
            fi
        fi

        # If it's not a file, assume it's the name of an observation.
        
        # we need the name of the station if we're going to lookup by observation name
        set +u
        if [[ -z "$STATION" ]]; then
            fatal "STATION environment variable not set. Set to two-letter station id."
        fi
        set -u
        STATION=$(echo "$STATION" | tr '[:upper:]' '[:lower:]')

        # Should we add the proc file?
        if [[ -n "$PUSH_PROC" ]]; then
            proc="$FS_PROC_PATH/$ref$STATION.prc"
            if [[ ! -f "$proc" ]]; then
                fatal "$proc not found"
            fi
            add $proc
        fi

        # Look for a regular log, a full log, or a compressed version of either.
        # If more than one exists, abort and ask the user to deal with it
        log=()
        for f in $FS_LOG_PATH/$ref${STATION}{,$PLOG_FULL_SUFFIX}.log{,.$PLOG_COMPRESSED_EXT}; do
            if [[ -f $f ]]; then
                info found $f
                log+=( $f )
            fi
        done
        if [[ ${#log[@]} -lt 1 ]]; then
            fatal "log file for $ref not found"
            exit
        fi
        if [[ ${#log[@]} -gt 1 ]]; then
            fatal "Conflicting log files:"\
                  "\n$(IFS=$'\n';echo -e "${log[*]}")\n"\
                  "Inspect the files and either concatenate them or removed the invalid one(s)."
        fi

        # If log contains multicast data, rename to _full.* since we will upload a reduced file
        FULL=
        if "$egrep_program" -q "$PLOG_REDUCE_PATTERN" $log; then
            FULL=1
            logext=${log#*.} # extension, preserving .gz if it exists
            full=$FS_LOG_PATH/$ref$STATION$PLOG_FULL_SUFFIX.$logext
            if [[ $log != $full ]]; then
                if fuser -v $log 2>&1 | grep -q " ddout$" ; then
                    fatal "$log is open in the FS (ddout), close it first; aborting"
                fi
                # This should be safe, because we checked if file exists earlier.
                warn file contains multicast, renaming $(basename $log) to $(basename $full)
                $DRY mv $log $full
            fi
            log=$full
        fi

        # If given '-z' command, compressed file, add it to the queue, and move on
        if [[ -n "$COMPRESS" ]]; then
            add $(compress $log)
            continue
        fi

        # If log doesn't contains data that should be excluded in reduced log, add it to queue and move on.
        if [[ -z "$FULL" ]]; then
            # If log is compressed, extract in a tmp dir upload
            if [[ $log =~ $PLOG_COMPRESSED_EXT$ ]]; then
                add $(uncompress $log)
                continue
            fi
            add $log
            continue
        fi

        # The log must now contain multicast data, so generate reduced log and add it to the queue
        add $(reduce $log)
        # If not given '-r' also send a compressed log.
        if [[ -n "$BOTH" ]]; then
            add $(compress $log)
        fi
    done

    for center in $DATA_CENTERS; do
        info uploading to $center
        eval \$PLOG_CTR_$center ${FILES[@]}
    done
}

# Don't run main if file is being "source"d
# Useful for testing
if [[ $0 == "$BASH_SOURCE" ]]; then
    #'strict' mode
    # "-e" exits on error
    # "-u" error on undef variable
    # "-o pipefail" error if any command in a pipe fails
    set -euo pipefail
    main "$@"
fi
